#note: the script must be run in sequentially to ensure that 
    #propert subsample is attached for each analysis

setwd("C:\\Documents and Settings\\John\\My Documents\\Server\\Tree_Submission\\JELS\\Replication")

rm(list = ls())     # remove all stored objects   
library(rpart) #library for running trees
library(arm) #for using "attach.all" function
library(foreign)    # allow for import of STATA datset

search.court <-read.dta("search_data_replication.dta", convert.underscore = T)
search.court$except <- ifelse(search.court$except >= 1, 1,0) #turn except count variable into binary 0,1
search.court$sctdec <- ifelse(search.court$sctdec == 1, 0, 1)#switch DV so that 1 is a liberal decision
    #consistent w/ Benesh data
search.court$dec <- factor(search.court$sctdec, levels=0:1, labels=c("R", "U")) #use this factor for DV 
    # 0 = reasonable
    #1 = unreasonable
attach.all(search.court)

############################################
############################################
# Figure 5, Period 1: #1962-1972 terms
############################################
############################################

keep <- term <= 72
serc1<-rpart(dec~lctinc + lctaft + warrant + house + person + business + car + full + 
    except, cp=.01, subset = keep, minsplit= 15)
printcp(serc1, digits = 2) #print cp table
post(serc1, file = "") #plot tree in graphics window

#Prune Tree - use .p to indicate pruned trees
serc1.p <- prune(serc1, cp = .02)
printcp(serc1.p, digits = 2)
  #PLOT TREE 
post(serc1.p, file = "")#note: numbers under nodes need to be reversed to present
                        #modal category 1st 
                        # the default is level 1/level 2, i.e. R/U
#Resubstitution Predictions
re.serc1 <- ifelse((predict(serc1.p, type="vector")) == 2, 1, 0)#Resubsitution prediction
    #Recode prediction vector to 0,1 not 1,2
table(re.serc1,dec[keep])

#Proportional reduction in error
#100 x [(% correctly predicted - % in modal category )/(100%-% in modal category.]
modal.cat.serc1 <- ifelse((length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]) > .5, #since modal cat is 0
   (length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]),                            #need ifelse command
   1 - (length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]))                       
cor.re.serc1 <- ifelse(re.serc1==sctdec[keep],1,0)
pred.cor.re.serc1  <- sum(cor.re.serc1)/length(sctdec[keep])
pred.cor.re.serc1 #Resubstitution prediction rate
pre.re.serc1 <- 100*((100*pred.cor.re.serc1 - 100*modal.cat.serc1)/(100-100*modal.cat.serc1))
pre.re.serc1 #Resubstitution PRE

#Cross-Validated Prediction
    #simulate 1000 times and take mean
n.sims <- 100
pre.xv.serc1 <- rep(NA, length(n.sims))
pred.correct <- rep(NA, length(n.sims))
for (i in 1:n.sims){
xv.serc1 <- ifelse((xpred.rpart(serc1.p, xval=10, cp = .02)) == 2, 1,0) #cross-validated prediction; 
    #recode prediction vector to 0,1 not 1,2
correct.xv.serc1 <- ifelse(xv.serc1==sctdec[keep],1,0)
pred.cor.xv.serc1  <- sum(correct.xv.serc1)/length(sctdec[keep])
pred.correct[i] <- pred.cor.xv.serc1
pre.xv.serc1[i] <- 100*((100*pred.cor.xv.serc1 - 100*modal.cat.serc1)/(100-100*modal.cat.serc1))
}
print(summary(pred.correct)) # Cross-validated prediction rate 
print(summary(pre.xv.serc1)) # Cross-validated PRE

############################################
############################################
# Figure 6
############################################
############################################

#create a property variable and run tree just on full search and property

property.dummy <- ifelse(house == 1 | car==1| business == 1| person ==1, 1,0)
serc1.prop<-rpart(dec~property.dummy + full, cp=.0001, subset = keep)
printcp(serc1.prop, digits = 2)
post(serc1.prop, file = "")
re.serc.property <- ifelse((predict(serc1.prop, type="vector")) == 2, 1, 0)#resubstitution rate

#plot partitition with actual cases statistics
#get counts for each "region" -- starting w/ bottom left, and going clockwise
keep <- term<=72
region.1 <- ifelse(full==0 & property.dummy==0 & keep, 1,0)#identify cases in region 1 
sum.actual.1 <- sum(region.1) #get total number of cases in region 1 in 1st period, using [keep]
correct.region.1 <- ifelse(re.serc.property[region.1==1] == sctdec[region.1==1 & keep], 1,0)
sum.correct.region.1 <- sum(correct.region.1)

region.2 <- ifelse(full==1 & property.dummy==0 & keep, 1,0)#identify cases in region 2
sum.actual.2 <- sum(region.2) 
correct.region.2 <- ifelse(re.serc.property[region.2==1] == sctdec[region.2==1 & keep], 1,0)
sum.correct.region.2 <- sum(correct.region.2)

region.3 <- ifelse(full==1 & property.dummy==1 & keep, 1,0)#identify cases in region 3 
sum.actual.3 <- sum(region.3) 
correct.region.3 <- ifelse(re.serc.property[region.3==1] == sctdec[region.3==1 & keep], 1,0)
sum.correct.region.3 <- sum(correct.region.3)

region.4 <- ifelse(full==0 & property.dummy==1 & keep, 1,0)#identify cases in region 4 
sum.actual.4 <- sum(region.4) 
correct.region.4 <- ifelse(re.serc.property[region.4==1] == sctdec[region.4==1 & keep], 1,0)
sum.correct.region.4 <- sum(correct.region.4)

print(sum.actual.1)
print(sum.correct.region.1)
print(sum.actual.2)
print(sum.correct.region.2)
print(sum.actual.3)
print(sum.correct.region.3)
print(sum.actual.4)
print(sum.correct.region.4)

pdf("Figure_6.pdf", height = 4, width = 4)

par(mfrow = c(1,1), mar=c(5,4,1,1), pty = "s")#make plot square
plot(0, 0, xlab="",ylab="", type="n", axes = F, 
    xaxs = "i", yaxs = "i", xlim = c(0,10), ylim = c(0,10)) # call empty plot
box()
axis(2, at = c(2.5,7.5), tick = T, label = c("No","Yes"), las = 1)
mtext("Full Search?", 2, line =2.5, cex = 1.2, font = 2)
axis(1, at = c(2.5,7.5), tick = T, label = c("No","Yes"), las = 1)
mtext("Property Interest?", 1, line = 2.5, cex = 1.2, font = 2)
#indicate exclude area with shading
polygon(x=c(5, 10, 10,5),
    y = c(5,5,10,10), #left-hand block of "not liable" region
    col= "gray90",  border=T)
abline(v=5, lty = 2)
abline(h=5, lty =2 )
#add counts for each region: first number of cases that tree correctly predicts,
    #then number of cases that fall into each region
text(2.5,2.5, "0 / 0")
text(2.5,7.5, "5 / 7")
text(7.5,7.5, "33 / 49")
text(7.5,2.5, "8 / 9")

dev.off()

################################################################
################################################################
#Figure 7: Period 2, 1962-1976 terms
################################################################
################################################################

detach()
attach.all(search.court)
keep <- term<=76

serc2<-rpart(dec~lctinc + lctaft + warrant + house + person + business + car + full + except, 
    cp=.0000001, subset = keep, minsplit = 15)
printcp(serc2, digits = 2)
post(serc2, file = "")

#Prune Tree - use .p to indicate pruned trees
serc2.p <- prune(serc2, cp = .04)
printcp(serc2.p, digits =2)
  #PLOT TREE
post(serc2.p, file = "") #Note: tree is flipped in paper for consistency with Figure 5
                         #i.e. no and yes split in the same direction from 1st node
#Resubstitution Predictions
re.serc2 <- ifelse((predict(serc2.p, type="vector")) == 2, 1, 0)#Resubsitution prediction
    #Recode prediction vector to 0,1 not 1,2
table(re.serc2,dec[keep])

#Proportional reduction in error
#100 x [(% correctly predicted - % in modal category )/(100%-% in modal category.]
modal.cat.serc2 <- ifelse((length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]) > .5, #since modal cat is 0
   (length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]),                            #need ifelse command
   1 - (length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]))                       
cor.re.serc2 <- ifelse(re.serc2==sctdec[keep],1,0)
pred.cor.re.serc2  <- sum(cor.re.serc2)/length(sctdec[keep])
pred.cor.re.serc2 #Resubstitution prediction rate
pre.re.serc2 <- 100*((100*pred.cor.re.serc2 - 100*modal.cat.serc2)/(100-100*modal.cat.serc2))
pre.re.serc2  #Resubstitution PRE

#Cross-Validated Prediction
    #simulate 1000 times and take mean
n.sims <- 100
pre.xv.serc2 <- rep(NA, length(n.sims))
pred.correct <- rep(NA, length(n.sims))
for (i in 1:n.sims){
xv.serc2 <- ifelse((xpred.rpart(serc2.p, xval=10, cp = .001)) == 2, 1, 0) #cross-validated prediction; 
    #recode prediction vector to 0,1 not 1,2
#table(xv.serc2,dec[keep])
#xv.serc2 Ex Ad
#       0 24 18
#       1 12 11
#Proportional reduction in error
correct.xv.serc2 <- ifelse(xv.serc2==sctdec[keep],1,0)
pred.cor.xv.serc2  <- sum(correct.xv.serc2)/length(sctdec[keep])
pred.correct[i] <- pred.cor.xv.serc2
pre.xv.serc2[i] <- 100*((100*pred.cor.xv.serc2 - 100*modal.cat.serc2)/(100-100*modal.cat.serc2))
}
print(summary(pred.correct)) #Cross-validated prediction rate 
print(summary(pre.xv.serc2)) #Cross-validated PRE

################################################################
################################################################
#Figure 8
################################################################
################################################################

property.dummy <- ifelse(house == 1 | car==1| business == 1| person ==1, 1,0)
serc2.prop<-rpart(dec~property.dummy + except, cp=.01, subset = keep)
printcp(serc2.prop, digits = 2)
post(serc2.prop, file = "")
re.serc.property <- ifelse((predict(serc2.prop, type="vector")) == 2, 1, 0)#resubstitution rate

#plot partitition with actual cases statistics
#get counts for each "region" -- starting w/ bottom left, and going clockwise
keep <- term<=76
region.1 <- ifelse(except==0&property.dummy==0 & keep, 1,0)#identify cases in region 1 
sum.actual.1 <- sum(region.1) #get total number of cases in region 1 in 1st period, using [keep]
correct.region.1 <- ifelse(re.serc.property[region.1==1] == sctdec[region.1==1 & keep], 1,0)
sum.correct.region.1 <- sum(correct.region.1)

region.2 <- ifelse(except==1&property.dummy==0 & keep, 1,0)#identify cases in region 1 
sum.actual.2 <- sum(region.2) 
correct.region.2 <- ifelse(re.serc.property[region.2==1] == sctdec[region.2==1 & keep], 1,0)
sum.correct.region.2 <- sum(correct.region.2)

region.3 <- ifelse(except==1&property.dummy==1 & keep, 1,0)#identify cases in region 1 
sum.actual.3 <- sum(region.3) 
correct.region.3 <- ifelse(re.serc.property[region.3==1] == sctdec[region.3==1 & keep], 1,0)
sum.correct.region.3 <- sum(correct.region.3)

region.4 <- ifelse(except==0&property.dummy==1 & keep, 1,0)#identify cases in region 1 
sum.actual.4 <- sum(region.4) 
correct.region.4 <- ifelse(re.serc.property[region.4==1] == sctdec[region.4==1 & keep], 1,0)
sum.correct.region.4 <- sum(correct.region.4)

print(sum.actual.1)
print(sum.correct.region.1)
print(sum.actual.2)
print(sum.correct.region.2)
print(sum.actual.3)
print(sum.correct.region.3)
print(sum.actual.4)
print(sum.correct.region.4)

pdf("Figure_8.pdf", height = 4, width = 4)

par(mfrow = c(1,1), mar=c(5,4,1,1), pty = "s")#make plot square
plot(0, 0, xlab="",ylab="", type="n", axes = F, 
    xaxs = "i", yaxs = "i", xlim = c(0,10), ylim = c(0,10)) # call empty plot
box()
axis(2, at = c(2.5,7.5), tick = T, label = c("No","Yes"), las = 1)
mtext("Exception?", 2, line =2.5, cex = 1.2, font =2)
axis(1, at = c(2.5,7.5), tick = T, label = c("No","Yes"), las = 1)
mtext("Property Interest?", 1, line = 2.5, cex = 1.2, font =2)
#indicate exclude area with shading
polygon(x=c(5, 10, 10,5),
    y = c(5,5,0,0), #left-hand block of "not liable" region
    col= "gray90",  border=T)
abline(v=5, lty = 2)
abline(h=5, lty =2 )
#add counts for each region: first number of cases that tree correctly predicts,
    #then number of cases that fall into each region
text(2.5,2.5, "5 / 7")
text(2.5,7.5, "1 / 1")
text(7.5,7.5, "20 / 26")
text(7.5,2.5, "33 / 55")

dev.off()

###############################################################################################
#Figure 9, Period 3: 1962-1985 terms
#####################################################################################
detach()
attach.all(search.court)
keep <- term<=85

serc3<-rpart(dec~lctinc + lctaft + warrant + house + person + business + car + full + except, 
    cp=.0000001, subset = keep, minsplit = 15)
printcp(serc3, digits = 2)
post(serc3, file = "")

#Prune Tree - use .p to indicate pruned trees
serc3.p <- prune(serc3, cp = .015)
printcp(serc3.p, digits = 2)
  #Plot Tree
post(serc3.p, file = "") ##Note: tree is flipped in paper for consistency with Figure 5
                         #i.e. no and yes split in the same direction from 1st node
#Resubstitution Predictions
re.serc3 <- ifelse((predict(serc3.p, type="vector")) == 2, 1, 0)#Resubsitution prediction
    #Recode prediction vector to 0,1 not 1,2
table(re.serc3,dec[keep])# = 70% correct

#Proportional reduction in error
#100 x [(% correctly predicted - % in modal category )/(100%-% in modal category.]
modal.cat.serc3 <- ifelse((length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]) > .5, #since modal cat is 0
   (length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]),                            #need ifelse command
   1 - (length(sctdec[keep])-sum(sctdec[keep]))/length(sctdec[keep]))                       
cor.re.serc3 <- ifelse(re.serc3==sctdec[keep],1,0)
pred.cor.re.serc3  <- sum(cor.re.serc3)/length(sctdec[keep])
pred.cor.re.serc3 #Resubstitution prediction rate
pre.re.serc3 <- 100*((100*pred.cor.re.serc3 - 100*modal.cat.serc3)/(100-100*modal.cat.serc3))
pre.re.serc3 # Resubstitution PRE

#Cross-Validated Prediction
    #simulate 1000 times and take mean
n.sims <- 1000
pre.xv.serc3 <- rep(NA, length(n.sims))
pred.correct <- rep(NA, length(n.sims))
for (i in 1:n.sims){
xv.serc3 <- ifelse((xpred.rpart(serc3.p, xval=10, cp = .01)) == 2, 1, 0) 
#Proportional reduction in error
correct.xv.serc3 <- ifelse(xv.serc3==sctdec[keep],1,0)
pred.cor.xv.serc3  <- sum(correct.xv.serc3)/length(sctdec[keep])
pred.correct[i] <- pred.cor.xv.serc3
pre.xv.serc3[i] <- 100*((100*pred.cor.xv.serc3 - 100*modal.cat.serc3)/(100-100*modal.cat.serc3))
}
print(summary(pred.correct)) #Cross-validation prediction rate
print(summary(pre.xv.serc3)) #Cross-validation PRE
